h x
x h
h x
h
x
x
f
h = f(x)
V
W
h=f(x)
r=g(h)
x
x
r h
f x h g h r
g
r = g(h) = g(f(x))
L L(r, x)
r x
L {x}
h x
x
h ||
h
i
x
|| ||
h
i
x
||
h
i
x
h
= f(x) = arg min
h
L(g(h), x)) + λΩ(h)
L f
g Ω(h)
Ω(h)
|h|
1
=
i
|h
i
|
t
i
log(1 + α
2
h
2
i
)
αh
i
t
i
(t log h
i
+ (1 t) log(1 h
i
)),
t h
i
(0, 1)
||
h
x
||
2
F
h
i
(x)
x
h
i
x x
x
x h
h
i
= 0
f(x) ||f(x)f(y)|| < ||xy|| x y
||f
(x)|| < 1
x
r = g(f(x)) x x +
x
r
x x
x
h
log p(h)
p(h)
f
h
log p(h)
h
x
f g L
L
x
p(x | h)
L
p(x | h)
x h
x
h h
p(x | h) =
i
p(x
i
| h)
x
i
| h
x
i
|h x
i
g(h)
p(x | h)
f(x) q(h | x)
h
q(h | x) p(x | h)
x
h
Q(h|x)
P (x|h)
q(h | x)
p(x | h) p = q
p(x, h)
q(h | x) = p(h | x)
q(h | x) p(x | h)
q(h | x) p(x | h)
h x
h p(h),
P (x|h)
h P(h)
x = Wh+ b + noise
x h
p(h)
x = W h + b + noise
h N(0, I)
x
i
h
ψ = diag(σ
2
) σ
2
= (σ
2
1
, σ
2
2
, . . .)
x
i
x
x N(b, W W
+ ψ)
W x
i
x
j
x
i
ˆ
h
k
= W
k
x
w
ki
k
ˆ
h
k
x
j
w
kj
σ
i
x W W
+ σ
2
I
σ
2
x N(b, W W
+ σ
2
I)
x = W h + b + σz
z N(0, I)
W σ
2
h
σ
2
σ 0 h
x d
W
h
i
x
σ 0 d
W
p(h) =
i
p(h
i
).
p(x | h)
p(h)
h h
h = Uz
U
z = U
h,
h (0, I) z
V ar[z] = E[zz
] = E[U
hh
U] = U
V ar[h]U = U
U = I.
x
s = V x
h
x
i
h
j
h
j
x =
f(h)+noise
h p(h)
p(h) =
i
p(h
i
) =
i
λ
2
e
λ|h
i
|
t
p(h) =
i
p(h
i
)
i
1
1 +
h
2
i
ν
ν+1
2
.
h
i
= 0
p(h | x)
h = 0
h
h = f(x) = arg min
h
L(g(h), x)) + λΩ(h)
L(g(h), x) log p(x | g(h)) Ω(h) log p(h)
h
i
p(h)
x h
L = log p(x | h)
h
x
x
f
g
L = log P (x|g(f(x)))
g x
h = f (x) L = log p(x | g(f(x)))
g(f(x))
g(f(x))
p(x | h)
p(x |
h) =
i
p(x
i
| h) x
i
| h
g(h)
p(x | h) = p(x | g(h))
p(x | h)
p(x)
q(h | x) q(h | x)
p(h)
h = f(x)
h
(x) = arg max
h
log p(h | x) = arg min
h
||x (b + W h)||
2
σ
2
log p(h)
σ
2
p(h)
h = 0
p(h
i
) =
λ
2
e
λ|h
i
|
λ
t
p(h
i
)
1
(1 +
h
2
i
ν
)
ν+1
2
.
x
h x
x
x h
Ω(h)
L = log p(x | g(h)) + Ω(h)
g(h) h = f(x)
Ω(h)
h
λ
2
e
λ|h
i
|
Ω(h) = λ
i
|h
i
|
log p(h) =
i
log
λ
2
+ λ|h
i
| = const + Ω(h)
λ h
λ
t
Ω(h) =
i
ν + 1
2
log(1 +
h
2
i
ν
)
ν
h
i
ρ = 0.05
Ω(h) =
i
ρ log h
i
+ (1 ρ) log(1 h
i
)
0 < h
i
< 1 h
i
= sigmoid(a
i
)
p = h
i
p = ρ
h
x θ
arg max
θ
p(θ | x) = arg max
θ
(log p(x | θ) + log p(θ))
θ
h
f(x)
L = arg min
h
||x g(h)||
2
+ λ|h|
1
+ γ||h f(x)||
2
f g
x h
h = f(x)
h h
g f
f
f
g f
f
f
x
˜x
Cx|x)
f
g
L = log P (x|g(fx)))
h = fx)
x
˜
x
L = log p(x | g(f(
˜
x)))
˜
x x
C(
˜
x | x)
C(
˜
x | x)
˜
x x
p(x |
˜
x)
(x,
˜
x)
x = x
˜
x =
˜
x C(
˜
x |
x = x)
(x,
˜
x)
p(x |
˜
x) = p(x | g(h)) h f(
˜
x)
g(h)
log p(x | h)
x
x
˜
x
E
xq(x)
E
˜
xC(
˜
x|x)
log p(x | g(f (
˜
x)))
q(x)
Cx|x)
x
˜x
˜x
g(fx)) E[x|˜x]
x
˜
x C(
˜
x | x)
x
||g(f(
˜
x)) x||
2
g(f(
˜
x)) E[x |
˜
x]
x
˜
x
g(f(x)) x
log q(x)
x
q
(g(f(x)) x)
log q(x)
x
x
||g(f(
˜
x)) x||
2
C(
˜
x =
˜
x|x) = N(
˜
x; µ = x, Σ = σ
2
I)
σ
2
g(f(x))x
σ
2
log q(x)
x
q(x)
g(f(x)) x
σ
2
log q(x)
x
,
f g
g(f(x))
x
x
g(f(x)) x
h = f(x) f
Ω(h) =
f (x)
x
2
F
h
h
x
f (x)
x
f(x) x
f(x)
x
f
h
f (x)
x
×
h
x
f
g
f
f g
g f